{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "device:  cpu\n",
      "max_steps:  999\n",
      "policy:  Policy(\n",
      "  (base): MLPBase(\n",
      "    (actor): Sequential(\n",
      "      (0): Linear(in_features=2, out_features=64, bias=True)\n",
      "      (1): Tanh()\n",
      "      (2): Linear(in_features=64, out_features=64, bias=True)\n",
      "      (3): Tanh()\n",
      "    )\n",
      "    (critic): Sequential(\n",
      "      (0): Linear(in_features=2, out_features=64, bias=True)\n",
      "      (1): Tanh()\n",
      "      (2): Linear(in_features=64, out_features=64, bias=True)\n",
      "      (3): Tanh()\n",
      "    )\n",
      "    (critic_linear): Linear(in_features=64, out_features=1, bias=True)\n",
      "  )\n",
      "  (dist): DiagGaussian(\n",
      "    (fc_mean): Linear(in_features=64, out_features=1, bias=True)\n",
      "    (logstd): AddBias()\n",
      "  )\n",
      ")\n",
      "envs.observation_space.shape:  (2,) , len(obs_shape):  1\n",
      "envs.action_space:  Box(-1.0, 1.0, (1,), float32) , action_space.shape[0]:  1\n"
     ]
    }
   ],
   "source": [
    "import gym\n",
    "import numpy as np\n",
    "import torch\n",
    "import time\n",
    "from collections  import deque\n",
    "from parallelEnv import parallelEnv\n",
    "from envs import make_vec_envs\n",
    "from model import Policy\n",
    "from utils import get_render_func, get_vec_normalize\n",
    "\n",
    "device = torch.device(\"cpu\")\n",
    "print('device: ', device)\n",
    "\n",
    "seed = 0 \n",
    "\n",
    "## model Policy uses MLPBase\n",
    "envs = parallelEnv('MountainCarContinuous-v0', n=8, seed=seed) ## weights created by n = 16\n",
    "\n",
    "max_steps = envs.max_steps\n",
    "print('max_steps: ', max_steps)\n",
    "\n",
    "policy = Policy(envs.observation_space.shape, envs.action_space,\\\n",
    "        base_kwargs={'recurrent': False})\n",
    "\n",
    "print('policy: ', policy)\n",
    "policy.to(device)\n",
    "\n",
    "num_processes = 1\n",
    "env_venv = make_vec_envs('MountainCarContinuous-v0', \\\n",
    "                    seed + 1000, num_processes,\n",
    "                    None, None, False, device=device, allow_early_resets=False)\n",
    "\n",
    "print('envs.observation_space.shape: ', envs.observation_space.shape, \\\n",
    "      ', len(obs_shape): ', len(envs.observation_space.shape))\n",
    "print('envs.action_space: ',  envs.action_space, \\\n",
    "      ', action_space.shape[0]: ', envs.action_space.shape[0])\n",
    "\n",
    "\n",
    "def load_test(model):\n",
    "    model.base = torch.load('dir_save_test\\we0_model_base_final.pth')\n",
    "    model.base.actor.load_state_dict(torch.load('dir_save_test\\we0_actor_final.pth'))\n",
    "    # model.base.critic.load_state_dict(torch.load('dir_save\\we0_critic_final.pth'))\n",
    "    # model.base.critic_linear.load_state_dict(torch.load('dir_save\\we0_critic_linear_final.pth'))\n",
    "    model.dist = torch.load('dir_save_test\\we0_model_dist_final.pth')\n",
    "    \n",
    "load_test(model = policy)    \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Episode 1 \tScore: 90.77, Avg.Score: 90.77, \tTime: 00:00:06\n",
      "Episode 2 \tScore: 91.36, Avg.Score: 91.07, \tTime: 00:00:07\n",
      "Episode 3 \tScore: 92.51, Avg.Score: 91.55, \tTime: 00:00:05\n",
      "Episode 4 \tScore: 89.01, Avg.Score: 90.91, \tTime: 00:00:07\n",
      "Episode 5 \tScore: 88.78, Avg.Score: 90.49, \tTime: 00:00:08\n",
      "Episode 6 \tScore: 88.87, Avg.Score: 90.22, \tTime: 00:00:05\n",
      "Episode 7 \tScore: 89.64, Avg.Score: 90.14, \tTime: 00:00:07\n"
     ]
    }
   ],
   "source": [
    "## No CUDA, only CPU\n",
    "\n",
    "def play_VecEnv(env, model, num_episodes):\n",
    "\n",
    "    obs = env.reset()\n",
    "    obs = torch.Tensor(obs)\n",
    "    obs = obs.float()\n",
    "        \n",
    "    recurrent_hidden_states = torch.zeros(1, model.recurrent_hidden_state_size)\n",
    "    \n",
    "    masks = torch.zeros(1, 1)\n",
    "    \n",
    "    scores_deque = deque(maxlen=100)\n",
    "\n",
    "    render_func = get_render_func(env)\n",
    "        \n",
    "    for i_episode in range(1, num_episodes+1):     \n",
    "\n",
    "        time_start = time.time()\n",
    "        total_reward = np.zeros(num_processes)\n",
    "        timestep = 0\n",
    "\n",
    "        done = False\n",
    "        \n",
    "        while not done:\n",
    "        \n",
    "            with torch.no_grad():\n",
    "                value, action, _, recurrent_hidden_states = \\\n",
    "                    model.act(obs, recurrent_hidden_states, masks, deterministic=False) # obs = state\n",
    "                            \n",
    "\n",
    "            render_func()\n",
    "            \n",
    "            obs, reward, done, _ = env.step(action.unsqueeze(1))\n",
    "            obs = torch.Tensor(obs)\n",
    "            obs = obs.float()\n",
    "\n",
    "            reward = reward.detach().numpy()\n",
    "            masks.fill_(0.0 if done else 1.0)\n",
    "            \n",
    "            total_reward += np.mean(reward)\n",
    "            \n",
    "            time.sleep(0.04)\n",
    "            \n",
    "            timestep += 1\n",
    "            \n",
    "            if done.all() == True or timestep + 1 == max_steps: ##   999:\n",
    "                break\n",
    "\n",
    "        s = (int)(time.time() - time_start)\n",
    "        \n",
    "        scores_deque.append(total_reward)        \n",
    "        avg_score = np.mean(scores_deque)\n",
    "                    \n",
    "        print('Episode {} \\tScore: {:.2f}, Avg.Score: {:.2f}, \\tTime: {:02}:{:02}:{:02}'\\\n",
    "                  .format(i_episode, np.mean(total_reward), avg_score,  s//3600, s%3600//60, s%60))\n",
    "    \n",
    "play_VecEnv(env=env_venv, model=policy, num_episodes=7)   \n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "env_venv.close()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "ml2-kernel",
   "language": "python",
   "name": "ml2-kernel"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
